# Acting under Chapter 7: Data Collection and Pre-Processing #
# Author: Johannes Scherzinger #
# Date: 05.02.2021 #

# Packages
library(tidyverse)

# Run this Script first to replicate research in "Acting Under Chapter 7".


#### Data Augmentation ####

# Begin by using Schoenfeld et al. UNSC Debates data (2019) <https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/KGVSYH>
# Raw data must be merged on meetings
raw_docs <- raw_docs %>% 
  select(doc_id, text) %>% 
  mutate(filename = doc_id)

raw_docs$doc_id <- NULL


# build new object containing meta data and speeches
UNSC_speeches <- left_join(meta_speeches, raw_docs, by = "filename")


# merge again with original UN Resolution Corpus (UNRESO 2021), containing all resolutions from 1990-2018
# use as unique identifier the "spv number"

UNSC_raw <- inner_join(UNSC_speeches, UN_RESO, by = "spv")

# check for duplicates
duplicated(UNSC$filename)

# remove duplicates
UNSC <- UNSC %>% 
  distinct(UNSC$filename, .keep_all = TRUE) # 290 Duplicate Rows removed

